Diagnostics and
Model Evaluation

Lecture 03

Dr. Colin Rundel

Some more linear models

Linear model and data

ggplot(d, aes(x=x,y=y)) + 
  geom_point() + 
  geom_smooth(method="lm", color="blue", se = FALSE)

Linear model

l = lm(y ~ x, data=d)

summary(l)
## 
## Call:
## lm(formula = y ~ x, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6041 -1.2142 -0.1973  1.1969  3.7072 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.030315   0.310326   -3.32  0.00126 ** 
## x            0.068409   0.005335   12.82  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.54 on 98 degrees of freedom
## Multiple R-squared:  0.6266, Adjusted R-squared:  0.6227 
## F-statistic: 164.4 on 1 and 98 DF,  p-value: < 2.2e-16

Bayesian model (brms)

( b = brms::brm(
    y ~ x, data=d,
    prior = c(
      brms::prior(normal(0, 100), class = Intercept),
      brms::prior(normal(0, 10),  class = b),
      brms::prior(cauchy(0, 2),   class = sigma)
    ),
    silent = 2, refresh = 0
  )
)
## Running /usr/lib64/R/bin/R CMD SHLIB foo.c
## gcc -m64 -I"/usr/include/R" -DNDEBUG   -I"/usr/lib64/R/library/Rcpp/include/"  -I"/usr/lib64/R/library/RcppEigen/include/"  -I"/usr/lib64/R/library/RcppEigen/include/unsupported"  -I"/usr/lib64/R/library/BH/include" -I"/usr/lib64/R/library/StanHeaders/include/src/"  -I"/usr/lib64/R/library/StanHeaders/include/"  -I"/usr/lib64/R/library/RcppParallel/include/"  -I"/usr/lib64/R/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/usr/lib64/R/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fpic  -O2  -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1  -m64  -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection  -c foo.c -o foo.o
## In file included from /usr/lib64/R/library/RcppEigen/include/Eigen/Dense:1,
##                  from /usr/lib64/R/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
##                  from <command-line>:
## /usr/lib64/R/library/RcppEigen/include/Eigen/Core:82:12: fatal error: new: No such file or directory
##    82 |   #include <new>
##       |            ^~~~~
## compilation terminated.
## make: *** [/usr/lib64/R/etc/Makeconf:168: foo.o] Error 1
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: y ~ x 
##    Data: d (Number of observations: 100) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept    -1.03      0.31    -1.63    -0.41 1.00     3997     2965
## x             0.07      0.01     0.06     0.08 1.00     4475     3119
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.56      0.11     1.35     1.80 1.00     3388     2778
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Parameter estimates

plot(b)

tidybayes

b_post = b |>
  tidybayes::gather_draws(b_Intercept, b_x, sigma) |>
  group_by(.variable, .chain)

head(b_post)
## # A tibble: 6 × 5
## # Groups:   .variable, .chain [1]
##   .chain .iteration .draw .variable   .value
##    <int>      <int> <int> <chr>        <dbl>
## 1      1          1     1 b_Intercept -1.22 
## 2      1          2     2 b_Intercept -0.804
## 3      1          3     3 b_Intercept -0.847
## 4      1          4     4 b_Intercept -1.11 
## 5      1          5     5 b_Intercept -1.08 
## 6      1          6     6 b_Intercept -0.593
tail(b_post)
## # A tibble: 6 × 5
## # Groups:   .variable, .chain [1]
##   .chain .iteration .draw .variable .value
##    <int>      <int> <int> <chr>      <dbl>
## 1      4        995  3995 sigma       1.55
## 2      4        996  3996 sigma       1.53
## 3      4        997  3997 sigma       1.50
## 4      4        998  3998 sigma       1.74
## 5      4        999  3999 sigma       1.49
## 6      4       1000  4000 sigma       1.54

Posterior plots

b_post |>
  ggplot(aes(fill=as.factor(.chain), group=.chain, x=.value)) +
  geom_density(alpha=0.33, color=NA) +
  facet_wrap(~.variable, scales = "free")

Trace plots

b_post %>% 
  ggplot(aes(x=.iteration, y=.value, color=as.factor(.chain))) +
  geom_line(alpha=0.5) +
  facet_grid(.variable~.chain, scale="free_y") +
  geom_smooth(method="loess") + labs(color="chain")

Credible Intervals

(b_ci = tidybayes::mean_hdi(b_post, .value, .width=c(0.8, 0.95)))
## # A tibble: 24 × 8
##    .variable   .chain  .value  .lower  .upper .width .point .interval
##    <chr>        <int>   <dbl>   <dbl>   <dbl>  <dbl> <chr>  <chr>    
##  1 b_Intercept      1 -1.04   -1.44   -0.660     0.8 mean   hdi      
##  2 b_Intercept      2 -1.03   -1.47   -0.662     0.8 mean   hdi      
##  3 b_Intercept      3 -1.02   -1.40   -0.621     0.8 mean   hdi      
##  4 b_Intercept      4 -1.01   -1.46   -0.651     0.8 mean   hdi      
##  5 b_x              1  0.0686  0.0610  0.0746    0.8 mean   hdi      
##  6 b_x              2  0.0686  0.0622  0.0758    0.8 mean   hdi      
##  7 b_x              3  0.0681  0.0605  0.0738    0.8 mean   hdi      
##  8 b_x              4  0.0683  0.0619  0.0757    0.8 mean   hdi      
##  9 sigma            1  1.55    1.41    1.68      0.8 mean   hdi      
## 10 sigma            2  1.56    1.39    1.68      0.8 mean   hdi      
## # … with 14 more rows

Aside - mean_qi vs mean_hdi

These differ in the use of the quantile interval vs. the highest-density interval.

Caterpillar Plots

b_ci %>%
  ggplot(aes(x=.value, y=.chain, color=as.factor(.chain))) + 
  facet_grid(.variable~.) +
  tidybayes::geom_pointintervalh() +
  ylim(0.5,4.5)

Predictions

lm predictions

(l_pred = predict(l, se.fit = TRUE))
## $fit
##           1           2           3           4           5           6 
## -0.96190573 -0.89349639 -0.82508706 -0.75667772 -0.68826838 -0.61985904 
##           7           8           9          10          11          12 
## -0.55144970 -0.48304037 -0.41463103 -0.34622169 -0.27781235 -0.20940301 
##          13          14          15          16          17          18 
## -0.14099368 -0.07258434 -0.00417500  0.06423434  0.13264368  0.20105301 
##          19          20          21          22          23          24 
##  0.26946235  0.33787169  0.40628103  0.47469037  0.54309970  0.61150904 
##          25          26          27          28          29          30 
##  0.67991838  0.74832772  0.81673706  0.88514639  0.95355573  1.02196507 
##          31          32          33          34          35          36 
##  1.09037441  1.15878375  1.22719308  1.29560242  1.36401176  1.43242110 
##          37          38          39          40          41          42 
##  1.50083044  1.56923977  1.63764911  1.70605845  1.77446779  1.84287713 
##          43          44          45          46          47          48 
##  1.91128646  1.97969580  2.04810514  2.11651448  2.18492382  2.25333315 
##          49          50          51          52          53          54 
##  2.32174249  2.39015183  2.45856117  2.52697051  2.59537984  2.66378918 
##          55          56          57          58          59          60 
##  2.73219852  2.80060786  2.86901720  2.93742654  3.00583587  3.07424521 
##          61          62          63          64          65          66 
##  3.14265455  3.21106389  3.27947323  3.34788256  3.41629190  3.48470124 
##          67          68          69          70          71          72 
##  3.55311058  3.62151992  3.68992925  3.75833859  3.82674793  3.89515727 
##          73          74          75          76          77          78 
##  3.96356661  4.03197594  4.10038528  4.16879462  4.23720396  4.30561330 
##          79          80          81          82          83          84 
##  4.37402263  4.44243197  4.51084131  4.57925065  4.64765999  4.71606932 
##          85          86          87          88          89          90 
##  4.78447866  4.85288800  4.92129734  4.98970668  5.05811601  5.12652535 
##          91          92          93          94          95          96 
##  5.19493469  5.26334403  5.33175337  5.40016270  5.46857204  5.53698138 
##          97          98          99         100 
##  5.60539072  5.67380006  5.74220939  5.81061873 
## 
## $se.fit
##   [1] 0.3057054 0.3011088 0.2965369 0.2919909 0.2874720 0.2829816 0.2785209
##   [8] 0.2740915 0.2696948 0.2653326 0.2610065 0.2567184 0.2524702 0.2482640
##  [15] 0.2441019 0.2399862 0.2359194 0.2319039 0.2279426 0.2240384 0.2201941
##  [22] 0.2164131 0.2126987 0.2090545 0.2054842 0.2019917 0.1985811 0.1952567
##  [29] 0.1920231 0.1888847 0.1858466 0.1829136 0.1800909 0.1773838 0.1747977
##  [36] 0.1723379 0.1700101 0.1678196 0.1657719 0.1638723 0.1621262 0.1605384
##  [43] 0.1591137 0.1578566 0.1567710 0.1558606 0.1551285 0.1545770 0.1542084
##  [50] 0.1540237 0.1540237 0.1542084 0.1545770 0.1551285 0.1558606 0.1567710
##  [57] 0.1578566 0.1591137 0.1605384 0.1621262 0.1638723 0.1657719 0.1678196
##  [64] 0.1700101 0.1723379 0.1747977 0.1773838 0.1800909 0.1829136 0.1858466
##  [71] 0.1888847 0.1920231 0.1952567 0.1985811 0.2019917 0.2054842 0.2090545
##  [78] 0.2126987 0.2164131 0.2201941 0.2240384 0.2279426 0.2319039 0.2359194
##  [85] 0.2399862 0.2441019 0.2482640 0.2524702 0.2567184 0.2610065 0.2653326
##  [92] 0.2696948 0.2740915 0.2785209 0.2829816 0.2874720 0.2919909 0.2965369
##  [99] 0.3011088 0.3057054
## 
## $df
## [1] 98
## 
## $residual.scale
## [1] 1.540006
d %>%
  mutate(pred = l_pred$fit, pred_se = l_pred$se.fit) |>
  ggplot(aes(x=x,y=y)) + 
  geom_point() +
  geom_line(aes(y=pred), col="blue", size=1.5) +
  geom_ribbon(aes(ymin=pred-1.96*pred_se, ymax=pred+1.96*pred_se), col="blue", fill="blue", alpha=0.33)

brms predictions

:::: {.columns} ::: {.column width=‘50%’}

(b_pred = predict(b))
##             Estimate Est.Error        Q2.5    Q97.5
##   [1,] -9.609645e-01  1.561926 -4.01203654 2.161372
##   [2,] -8.880984e-01  1.594119 -3.97947145 2.277786
##   [3,] -7.858581e-01  1.603036 -3.91171841 2.394677
##   [4,] -7.342974e-01  1.600227 -3.88127484 2.333750
##   [5,] -6.782995e-01  1.588578 -3.77348838 2.465124
##   [6,] -6.059220e-01  1.600517 -3.77323299 2.491334
##   [7,] -5.634273e-01  1.589095 -3.73139156 2.565575
##   [8,] -4.503677e-01  1.583472 -3.51313223 2.644334
##   [9,] -4.645129e-01  1.579031 -3.51067470 2.690915
##  [10,] -3.522931e-01  1.567633 -3.40355989 2.732593
##  [11,] -2.699042e-01  1.578862 -3.40128176 2.788625
##  [12,] -2.006014e-01  1.596025 -3.25372207 2.961250
##  [13,] -1.792603e-01  1.587733 -3.24881918 3.027675
##  [14,] -3.106034e-02  1.577195 -3.19431229 3.157428
##  [15,]  3.371975e-05  1.585501 -3.14993225 3.075848
##  [16,]  1.081268e-01  1.573322 -2.95850787 3.243933
##  [17,]  1.355748e-01  1.560098 -2.95157709 3.164764
##  [18,]  1.845004e-01  1.585099 -2.93906314 3.266374
##  [19,]  2.766531e-01  1.581550 -2.80988107 3.434141
##  [20,]  4.153054e-01  1.573901 -2.64239309 3.525921
##  [21,]  4.469171e-01  1.545544 -2.55743704 3.588765
##  [22,]  4.938189e-01  1.562802 -2.58361713 3.620882
##  [23,]  5.534011e-01  1.575582 -2.54490409 3.690555
##  [24,]  5.791366e-01  1.561923 -2.50197889 3.580695
##  [25,]  6.886781e-01  1.568868 -2.39069102 3.711175
##  [26,]  7.304538e-01  1.542950 -2.23618923 3.780113
##  [27,]  8.111103e-01  1.557839 -2.23458326 3.853192
##  [28,]  8.842770e-01  1.551534 -2.20155673 3.764824
##  [29,]  9.719294e-01  1.547406 -1.98618115 4.056526
##  [30,]  1.040635e+00  1.555397 -2.06757786 4.033709
##  [31,]  1.085562e+00  1.554451 -1.94667292 4.145884
##  [32,]  1.176402e+00  1.576074 -1.82352499 4.377211
##  [33,]  1.273333e+00  1.572840 -1.77642008 4.412295
##  [34,]  1.286405e+00  1.565852 -1.79057972 4.394935
##  [35,]  1.387419e+00  1.549509 -1.61096039 4.399009
##  [36,]  1.446683e+00  1.589065 -1.70731975 4.511346
##  [37,]  1.532664e+00  1.572597 -1.64708517 4.554928
##  [38,]  1.568771e+00  1.558224 -1.48300565 4.653967
##  [39,]  1.623324e+00  1.612817 -1.54905590 4.761668
##  [40,]  1.678814e+00  1.567379 -1.47357233 4.762586
##  [41,]  1.782974e+00  1.573712 -1.29631145 4.885892
##  [42,]  1.877479e+00  1.548194 -1.08297442 4.912650
##  [43,]  1.921552e+00  1.535607 -1.07306871 5.016000
##  [44,]  1.998369e+00  1.559130 -1.02331607 5.076560
##  [45,]  2.035075e+00  1.539819 -0.89178174 5.048298
##  [46,]  2.176537e+00  1.542257 -0.83156750 5.314918
##  [47,]  2.227979e+00  1.586014 -0.87991712 5.398825
##  [48,]  2.277262e+00  1.557772 -0.83417569 5.413789
##  [49,]  2.367287e+00  1.605126 -0.86534482 5.546315
##  [50,]  2.398062e+00  1.574125 -0.67690326 5.558285
##  [51,]  2.473210e+00  1.585363 -0.71756560 5.538187
##  [52,]  2.523591e+00  1.599935 -0.60272954 5.631386
##  [53,]  2.629534e+00  1.573425 -0.44059930 5.707797
##  [54,]  2.672699e+00  1.567681 -0.36905585 5.741345
##  [55,]  2.739328e+00  1.572254 -0.34296399 5.883309
##  [56,]  2.813801e+00  1.563977 -0.28567175 5.875150
##  [57,]  2.911577e+00  1.587793 -0.24721442 6.012408
##  [58,]  2.957987e+00  1.560586 -0.18865579 6.092150
##  [59,]  2.996499e+00  1.579324 -0.11718341 6.132627
##  [60,]  3.077360e+00  1.588873  0.01756065 6.164071
##  [61,]  3.132668e+00  1.601346  0.09221999 6.298732
##  [62,]  3.184913e+00  1.560496  0.14461880 6.291114
##  [63,]  3.272213e+00  1.552482  0.21894055 6.324977
##  [64,]  3.326517e+00  1.579606  0.25671196 6.601024
##  [65,]  3.394134e+00  1.591410  0.29382205 6.501127
##  [66,]  3.453681e+00  1.538657  0.40587011 6.478285
##  [67,]  3.517779e+00  1.583323  0.32795000 6.716798
##  [68,]  3.628035e+00  1.586191  0.52193517 6.727908
##  [69,]  3.692430e+00  1.616687  0.46691154 6.856565
##  [70,]  3.732652e+00  1.576940  0.64516332 6.770240
##  [71,]  3.844680e+00  1.591000  0.80007719 7.000680
##  [72,]  3.940249e+00  1.570755  0.87033093 6.974891
##  [73,]  3.950405e+00  1.567254  0.88325371 6.990208
##  [74,]  4.026070e+00  1.591825  0.88927647 7.229415
##  [75,]  4.142334e+00  1.584918  1.04124886 7.210401
##  [76,]  4.140091e+00  1.551480  1.10399434 7.204764
##  [77,]  4.224362e+00  1.570117  1.20355810 7.289300
##  [78,]  4.304548e+00  1.581696  1.24309806 7.390254
##  [79,]  4.345522e+00  1.541654  1.36296433 7.513612
##  [80,]  4.431965e+00  1.576116  1.41507989 7.576618
##  [81,]  4.497625e+00  1.586494  1.34112248 7.666660
##  [82,]  4.577816e+00  1.571520  1.37327435 7.648531
##  [83,]  4.663426e+00  1.594505  1.49724813 7.752342
##  [84,]  4.682267e+00  1.566057  1.59204355 7.702763
##  [85,]  4.769504e+00  1.617151  1.58635342 7.915213
##  [86,]  4.850687e+00  1.576331  1.76594388 7.984613
##  [87,]  4.946223e+00  1.556935  2.00953729 8.047708
##  [88,]  4.988648e+00  1.588766  1.85061577 8.077427
##  [89,]  5.020983e+00  1.603189  1.82841168 8.091616
##  [90,]  5.145746e+00  1.600265  1.99977662 8.328021
##  [91,]  5.157781e+00  1.579005  2.03439294 8.169977
##  [92,]  5.278350e+00  1.605617  2.06296166 8.433425
##  [93,]  5.332391e+00  1.577969  2.28086164 8.499105
##  [94,]  5.422939e+00  1.619846  2.27769826 8.596465
##  [95,]  5.474140e+00  1.579001  2.40617385 8.603363
##  [96,]  5.543493e+00  1.611510  2.37950601 8.739808
##  [97,]  5.621012e+00  1.581118  2.44687661 8.660984
##  [98,]  5.630782e+00  1.609789  2.38839453 8.735881
##  [99,]  5.725313e+00  1.600023  2.60614682 8.959030
## [100,]  5.829044e+00  1.593171  2.70807670 8.920203

:::

::: {.column width=‘50%’}

d %>%
  bind_cols(b_pred) |>
  ggplot(aes(x=x,y=y)) + 
  geom_point() +
  geom_line(aes(y=Estimate), col="red", size=1.5) +
  geom_ribbon(aes(ymin=Q2.5, ymax=Q97.5), col='red', fill='red', alpha=0.33)

Why are the intervals different?

Raw predictions

dim( brms::posterior_predict(b) )
## [1] 4000  100
dim( brms::posterior_epred(b) )
## [1] 4000  100

Tidy raw predictions

( b_post_pred = tidybayes::predicted_draws(b, newdata=d) )
## # A tibble: 400,000 × 7
## # Groups:   x, y, .row [100]
##        x     y  .row .chain .iteration .draw .prediction
##    <int> <dbl> <int>  <int>      <int> <int>       <dbl>
##  1     1 -3.24     1     NA         NA     1       3.53 
##  2     1 -3.24     1     NA         NA     2      -3.13 
##  3     1 -3.24     1     NA         NA     3      -0.912
##  4     1 -3.24     1     NA         NA     4       0.333
##  5     1 -3.24     1     NA         NA     5      -2.53 
##  6     1 -3.24     1     NA         NA     6      -6.33 
##  7     1 -3.24     1     NA         NA     7      -2.63 
##  8     1 -3.24     1     NA         NA     8      -2.46 
##  9     1 -3.24     1     NA         NA     9      -1.22 
## 10     1 -3.24     1     NA         NA    10      -0.821
## # … with 399,990 more rows
( b_post_epred = tidybayes::epred_draws(b, newdata=d) )
## # A tibble: 400,000 × 7
## # Groups:   x, y, .row [100]
##        x     y  .row .chain .iteration .draw .epred
##    <int> <dbl> <int>  <int>      <int> <int>  <dbl>
##  1     1 -3.24     1     NA         NA     1 -1.15 
##  2     1 -3.24     1     NA         NA     2 -0.737
##  3     1 -3.24     1     NA         NA     3 -0.782
##  4     1 -3.24     1     NA         NA     4 -1.04 
##  5     1 -3.24     1     NA         NA     5 -1.01 
##  6     1 -3.24     1     NA         NA     6 -0.530
##  7     1 -3.24     1     NA         NA     7 -1.23 
##  8     1 -3.24     1     NA         NA     8 -0.673
##  9     1 -3.24     1     NA         NA     9 -1.88 
## 10     1 -3.24     1     NA         NA    10 -1.43 
## # … with 399,990 more rows

Posterior predictions vs Expected posterior predictions

b_post_pred |>
  filter(.draw <= 25) |>
  ggplot(aes(x=x,y=y)) +
    geom_point() +
    geom_line(aes(y=.prediction, group=.draw), alpha=0.33)

b_post_epred |>
  filter(.draw <= 25) |>
  ggplot(aes(x=x,y=y)) +
    geom_point() +
    geom_line(aes(y=.epred, group=.draw), alpha=0.33)

Credible intervals

b_post_pred |>
  ggplot(aes(x=x, y=y)) +
    geom_point() +
    tidybayes::stat_lineribbon(aes(y=.prediction), alpha=0.25)

b_post_epred |>
  ggplot(aes(x=x, y=y)) +
    geom_point() +
    tidybayes::stat_lineribbon(aes(y=.epred), alpha=0.25)

Posterior predictive checks

brms::pp_check(b, ndraws = 25)

Residuals - lm

l |>
  broom::augment() |>
  ggplot(aes(x=x, y=.resid)) +
    geom_point() +
    geom_hline(yintercept=0, color='grey', linetype=2)

Residuals - brms

b %>%
  tidybayes::residual_draws(newdata=d) |>
  ggplot(aes(x=x, y=.residual, group=x)) +
    tidybayes::stat_pointinterval() +
    geom_hline(yintercept = 0, color='grey', linetype=2)

Model Evaluation

Model assessment?

If we think back to our first regression class, one common option is \(R^2\) which gives us the variability in \(Y\) explained by our model.

Quick review:

\[ \underset{\text{Total}}{\sum_{i=1}^n \left(Y_i - \bar{Y}\right)^2} = \underset{\text{Model}}{\sum_{i=1}^n \left(\hat{Y}_i - \bar{Y}\right)^2} + \underset{\text{Error }}{\sum_{i=1}^n \left(Y_i - \hat{Y}_i\right)^2} \]

\[ R^2 = \frac{SS_{model}}{SS_{total}} = \frac{\sum_{i=1}^n \left(\hat{Y}_i - \bar{Y}\right)^2}{\sum_{i=1}^n \left(Y_i - \bar{Y}\right)^2} = \frac{\text{Var}(\hat{\boldsymbol{Y}}) }{ \text{Var}({\boldsymbol{Y}}) } = \text{Cor}(\boldsymbol{Y}, \hat{\, color='grey', linetype=2), color='grey', linetype=2){Y}})^2 \]